## Authors: Alexander Herzog and Kenneth Benoit
## Date: May 30, 2015
## Replication file for JOP article "The Most Unkindest Cuts: Speaker Selection and Expressed Government Dissent During Economic Crisis"

rm(list = ls(all = TRUE))

library(rjags)
library(ggplot2)

### PATHS ########################################
dataSub <- "./generated_data/working_data_speakers.RData"
posteriorData <- "./estimated_models/joint_model3_multilevel_posterior.RData"
##################################################

load(dataSub)
data <- dataSub
load(posteriorData)

# factors in original data
data$crisis <- factor(data$crisis, levels=c("0", "1"), labels=c("Pre-crisis", "Crisis"))

# convert coda-object to matrix object
mcmcChain <- as.matrix(joint3.posterior)

# extract sampled values
as <- rowMeans(mcmcChain[, grepl("as\\[", colnames(mcmcChain))])
ao <- rowMeans(mcmcChain[, grepl("ao\\[", colnames(mcmcChain))])
bs <- mcmcChain[,grepl("bs\\[", colnames(mcmcChain))]
bo <- mcmcChain[,grepl("bo\\[", colnames(mcmcChain))]
rho <- mcmcChain[,"rho"]
sigma <- mcmcChain[,"sigma"]

# label coefficients
colnames(bs) <- c("unemployment",
                 "safetyScaled",
                 "seniorityYearsScaled",
                 "party.leader",
                 "government",
                 "cabMember",
                 "log.party.size",
                 "log.debate.days",
                 "crisis",
                 "crisis*unemployment",
                 "crisis*safetyScaled",
                 "crisis*government",
                 "government*unemployment",
                 "government*safetyScaled",
                 "crisis*government*unemployment",
                 "crisis*government*safetyScaled")

colnames(bo) <- c("unemployment",
                 "safetyScaled",
                 "seniorityYearsScaled",
                 "government",
                 "cabMember",
                 "crisis",
                 "crisis*unemployment",
                 "crisis*safetyScaled",
                 "crisis*government",
                 "government*unemployment",
                 "government*safetyScaled",
                 "crisis*government*unemployment",
                 "crisis*government*safetyScaled")


# Predicted probabilities for changes in unemployment rate
# --------------------------------------------------------
# create matrix of x values for prediction
startVal=min(data$unemployment)
endVal=max(data$unemployment)

N <- 100
pred <- with(data, expand.grid(
         unemployment = seq(startVal, endVal, (endVal-startVal)/(N-1)),
         safetyScaled = 0,
         seniorityYearsScaled = 0,
         party.leader = 0,
         government = c(0, 1),
         cabMember = 0,
         log.party.size = mean(data$log.party.size),
         log.debate.days = mean(data$log.debate.days),
         crisis = c(0, 1))
)


# define matrix for posterior predicted y values at each x value
sample.size <- nrow(bs)
yPred <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )
yPred2 <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )

imr <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )
w <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )

# generate posterior predicted y values for each x value
for (i in 1:sample.size) {
   

    # calculate linear prediction from selection eq
    selectionEq.lp <- as[i] +
        bs[i, "unemployment"] * pred$unemployment +
        bs[i, "safetyScaled"] * pred$safetyScaled +
        bs[i, "seniorityYearsScaled"] * pred$seniorityYearsScaled +
        bs[i, "party.leader"] * pred$party.leader +
        bs[i, "government"] * pred$government +
        bs[i, "cabMember"] * pred$cabMember +
        bs[i, "log.party.size"] * pred$log.party.size +
        bs[i, "log.debate.days"] * pred$log.debate.days +
        bs[i, "crisis"] * pred$crisis +
        bs[i, "crisis*unemployment"] * pred$crisis * pred$unemployment +
        bs[i, "crisis*safetyScaled"] * pred$crisis * pred$safetyScaled +
        bs[i, "crisis*government"] * pred$crisis * pred$government +
        bs[i, "government*unemployment"] * pred$government * pred$unemployment +
        bs[i, "government*safetyScaled"] * pred$government * pred$safetyScaled +
        bs[i, "crisis*government*unemployment"] * pred$crisis * pred$government * pred$unemployment +
        bs[i, "crisis*government*safetyScaled"] * pred$crisis * pred$government * pred$safetyScaled


    # calculate IMR
    imr[,i] <- dnorm(selectionEq.lp) / pnorm(selectionEq.lp)

    # calculate weight
    w[,i] <- rho[i] * sigma[i] * (imr[,i]^2 - (-selectionEq.lp) * imr[,i])

    # calculate prediction for variables that are both in the
    # selection and outcome equation   
    yPred[,i] <- ao[i] +
        (bo[i, "unemployment"] - bs[i, "unemployment"] * w[,i]) * pred$unemployment + 
        (bo[i, "safetyScaled"] - bs[i, "safetyScaled"] * w[,i]) * pred$safetyScaled + 
        (bo[i, "seniorityYearsScaled"] - bs[i, "seniorityYearsScaled"] * w[,i]) * pred$seniorityYearsScaled + 
        (bo[i, "government"] - bs[i, "government"] * w[,i]) * pred$government + 
        (bo[i, "cabMember"] - bs[i, "cabMember"] * w[,i]) * pred$cabMember + 
        (bo[i, "crisis"] - bs[i, "crisis"] * w[,i]) * pred$crisis + 
        (bo[i, "crisis*unemployment"] - bs[i, "crisis*unemployment"] * w[,i]) * pred$crisis * pred$unemployment + 
        (bo[i, "crisis*safetyScaled"] - bs[i, "crisis*safetyScaled"] * w[,i]) * pred$crisis * pred$safetyScaled + 
        (bo[i, "crisis*government"] - bs[i, "crisis*government"] * w[,i]) * pred$crisis * pred$government + 
        (bo[i, "government*unemployment"] - bs[i, "government*unemployment"] * w[,i]) * pred$government * pred$unemployment + 
        (bo[i, "government*safetyScaled"] - bs[i, "government*safetyScaled"] * w[,i]) * pred$government * pred$safetyScaled + 
        (bo[i, "crisis*government*unemployment"] - bs[i, "crisis*government*unemployment"] * w[,i]) * pred$crisis * pred$government * pred$unemployment + 
        (bo[i, "crisis*government*safetyScaled"] - bs[i, "crisis*government*safetyScaled"] * w[,i]) * pred$crisis * pred$government * pred$safetyScaled


    
    # calculate prediction from outcome equation (for comparison)
    yPred2[,i] <- ao[i] +
        bo[i, "unemployment"] * pred$unemployment +
        bo[i, "safetyScaled"] * pred$safetyScaled +
        bo[i, "seniorityYearsScaled"] * pred$seniorityYearsScaled +
        bo[i, "government"] * pred$government +
        bo[i, "cabMember"] * pred$cabMember +
        bo[i, "crisis"] * pred$crisis +
        bo[i, "crisis*unemployment"] * pred$crisis * pred$unemployment +
        bo[i, "crisis*safetyScaled"] * pred$crisis * pred$safetyScaled +
        bo[i, "crisis*government"] * pred$crisis * pred$government +
        bo[i, "government*unemployment"] * pred$government * pred$unemployment +
        bo[i, "government*safetyScaled"] * pred$government * pred$safetyScaled +
        bo[i, "crisis*government*unemployment"] * pred$crisis * pred$government * pred$unemployment +
        bo[i, "crisis*government*safetyScaled"] * pred$crisis * pred$government * pred$safetyScaled
    
    }


pred$fitted <- rowMeans(yPred)
pred$fitted2 <- rowMeans(yPred2)
pred$ci.lower <- apply(yPred, 1, function(x) quantile(x, prob=c(0.025)))
pred$ci.upper <- apply(yPred, 1, function(x) quantile(x, prob=c(0.975)))



# plot predicted values with ci
# -----------------------------
pred$government <- factor(pred$government, levels=c("1", "0"), labels=c("Government", "Opposition"))

pred$crisis <- factor(pred$crisis, levels=c("0", "1"), labels=c("Pre-crisis", "Crisis"))

xLabMin <- round(unique(data$lr_prop[data$unemployment==startVal]), 1)
xLabMax <- round(unique(data$lr_prop[data$unemployment==endVal]), 1)

numb.breaks <- 10
breaks.scaled.x <- round(seq(startVal, endVal, (endVal-startVal)/(numb.breaks-1)), 2)
breaks.labels <- paste0(round(seq(xLabMin, xLabMax, (xLabMax-xLabMin)/(numb.breaks-1)), 2)*100, "%")

p <- ggplot(pred, aes(x=unemployment, y=fitted, group=government)) +
    # theme
    theme_bw() +
    # scales
    scale_x_continuous(breaks=breaks.scaled.x, labels=breaks.labels) +
    scale_y_continuous(breaks=seq(-0.75,0.25,0.25), limits=c(-0.75,0.3)) +
    # facet
    facet_wrap(~ crisis) +
    # lines and ci
    geom_line(aes(lty = government), size=1) +
    geom_ribbon(aes(ymin = ci.lower, ymax = ci.upper), alpha = 0.2) +
    labs(linetype="") +
    # distribution of actual data    
    geom_rug(data=data, aes(x=unemployment, y=textscore), side="b") +
    # title
    labs(title="Effect of constituency unemployment on expressed position") +
    # axis labels
    xlab("Constituency unemployment") +
    ylab("Predicted textscore") +
    # theme values (text size, margins, position of legends, etc.)
    theme(
        legend.position="bottom",
        title = element_text(size=14, vjust=1.5),
        axis.title.x = element_text(vjust=-0.5),
        axis.title.y = element_text(size=12, vjust=0.4),
        axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
        strip.text.x = element_text(size=12),
        legend.text = element_text(size=12)
        )

pdf("./plots/predicted_position_joint_model.pdf", 12, 6)
print(p)
dev.off()


